*preparation of STAR data for analysis*the code in this file builds on code provided by Bryan Grahamclearuse "webstar.dta"*keep only those observations with non-missing class type*after class 1keep if star1==1 & star2==1 & star3==1*generate the instrument as first year class-type (which was fully randomized)gen Z=(cltypek<2) if (stark==1 & cltypek!=.)replace Z=(cltype1<2) if stark==2 *generate the treatment dummy as having at least two years of small classesgen D=((cltypek<2) +(cltype1<2) + (cltype2<2) + (cltype3<2))>2 if stark==1replace D=((cltype1<2) + (cltype2<2) + (cltype3<2))>1 if stark==2corr Z Dtabulate Z D*generate school dummies - apparently randomization was on the school level*schid is the school id for the treatment assignment yeargen schid=schidknreplace schid =schid1n if schid==.xi, pre(SIK) i.schidg  SIKschid_1 = 1 if schid==1replace SIKschid_1 = 0 if schid~=1 *individual covariatesg black = 1 if srace == 2replace black = 0 if black == . & srace~=.g girl = 1 if ssex == 2replace girl = 0 if ssex == 1g poor = 1 if sesk == 1replace poor = 0 if sesk == 2/***********************************************//* impute missing poverty - courtesy of Bryan code   *//***********************************************//* NOTE: for those students with missing SES data use nearest measure available measure    i.e., school lunch status in first grade, second grade etc. This method works for 17            of the 23 kindergarten students with missing school lunch data. */replace poor = 1 if poor == . & ses1 == 1 & stark ==1replace poor = 0 if poor == . & ses1 == 2 & stark ==1replace poor = 1 if poor == . & ses2 == 1 & stark ==1replace poor = 0 if poor == . & ses2 == 2 & stark ==1replace poor = 1 if poor == . & ses3 == 1 & stark ==1replace poor = 0 if poor == . & ses3 == 2 & stark ==1/* NOTE :	A total of 8 students have either missing school lunch data (after above)		or missing race data or both (3 students lack race data). For these students missing		values are replaced with school-by-grade medians. The interaction		variables are then calculated using these replaced values. */bys schid: egen blacksm = median(black)bys schid: egen poorsm = median(poor)	replace black = blacksm if black ==.replace poor = poorsm if poor ==.drop blacksm poorsm*generate normalized testscore data*second and third grade testsegen mathmean2 = mean(tmathss2)egen mathstd2 = sd(tmathss2)g mathnorm2 = (tmathss2-mathmean2)/mathstd2drop mathmean2 mathstd2egen readmean2 = mean(treadss2)egen readstd2 = sd(treadss2)g readnorm2 = (treadss2-readmean2)/readstd2drop readmean2 readstd2egen mathmean3 = mean(tmathss3)egen mathstd3 = sd(tmathss3)g mathnorm3 = (tmathss3-mathmean3)/mathstd3drop mathmean3 mathstd3egen readmean3 = mean(treadss3)egen readstd3 = sd(treadss3)g readnorm3 = (treadss3-readmean3)/readstd3drop readmean3 readstd3gen mathadd = (mathnorm2 + mathnorm3) / 2gen readadd = (readnorm2 + readnorm3) / 2egen mathmean = mean(mathadd)egen mathstd = sd(mathadd)g math = (mathadd-mathmean)/mathstddrop mathmean mathstdegen readmean = mean(readadd)egen readstd = sd(readadd)g read = (readadd-readmean)/readstddrop readmean readstd*generate summary statisticsbysort Z: gen total = _Nlabel var total "Observations"estpost tabstat poor black girl D math read total, by(Z) listwise s(mean sd) columns(statistics)esttab using sumstat.tex, main(mean %8.3f) aux(sd %8.3f) nostar nodepvar unstack nomtitle nonumber noobs addnotes("Mean of each variable with standard deviation in parentheses.") label title("Summary statistics for students assigned to small and regular classes\label{tab:sumstat}") replace nonotes